********************************************************************************
*DYNAMIC IMPACTS OF PRICING GROUNDWATER
*Bruno, Jessoe, Hanemann in JAERE
*
*Generate and clean data for land use event study regressions
*Input from "Land_preprocessing"
*Output here is used for "Land_eventstudy.do"
********************************************************************************

clear all
capture log close
set more off

*SELECT OUTPUT DATE
global outputdate = "20230615"		 

*Set DIRECTORY
cd  "D:\Ellen\Dropbox\Pajaro_AgInnovation" 

********************************************************************************
*Take Q3 snapshot of price data
use "Data\Price_data_20210527.dta"
keep if quarter==3
drop if year <2009 | year ==2010
replace delivered_rate=0 if delivered_rate==. & inside==0
save "Data\Price_Q3_data_20210601.dta", replace
********************************************************************************
********************************************************************************

use "Data\parcel_extraction_20210601.dta", clear
drop if year <2009 | year == 2010
merge m:1 year quarter using "Data\electricity_rates_20210601.dta"
drop _merge

*LAND USE IS ANNUAL - NEED TO COLLAPSE TO ANNUAL 
collapse (sum) extraction (mean) gw_depth rent flat time_of_use, by (year inside parcelnum county_code)

*MERGE OTHER VARIABLES
merge m:1 year inside using "Data\Price_Q3_data_20210601.dta"
drop _merge
merge m:1 year inside using "Data\Delivered_water_yearrun_20210603.dta"
drop _merge
merge m:1 inside year using "Data\Pajaro_zonalyr_chloride_20220729.dta"
drop _merge
merge m:1 parcelnum year using "Data\parcel_landuse_watermerge_LH_20230615.dta"
drop _merge


*LABELING
label variable extraction "Extraction"
label variable inside "Inside DWZ"
label variable delivered "Recycled Deliveries"
label variable delivered_rate "Delivered Rate"
label variable CL_zoneyear "Chloride"

*CLEANING
*Eliminate outliers
drop if extraction ==.  
drop if extraction < 0
drop if parcelnum==.

xtset parcelnum year

*GENERATING
*Set cluster variable
egen zone_year = group(inside year)
gen time = year - 2000

gen post = 0
replace post = 1 if year>2010
gen treatment = inside*post
label variable treatment "Inside*Post"
label variable post "Post-2010"


sort year parcelnum


preserve
collapse (mean) acres_tot, by(parcelnum)
rename acres_tot parcel_size_fixed
save "Data\parcel_size_fixed_$outputdate.dta", replace
restore

merge m:1 parcelnum using "Data\parcel_size_fixed_$outputdate.dta"
drop _merge


gen fallow_peracre = acres0/parcel_size_fixed
gen irr_peracre = acres_irr/parcel_size_fixed
sort parcelnum year

save "Data\Land_clean_$outputdate.dta", replace
